/* 
This do file counts the number of solar installations in each U.S. zip code.

NOTES: All directories and paths should be set in the main "Create_Number_Systems_Files.do" file.

*/

clear all

use `"${data_path}/openpv_all_2018-07-22.dta"'

gen num_systems=1

*drop utility installations
drop if install_type=="utility" | install_type=="Utility"

drop if missing(zipcode)

drop year
gen int year = real(substr(date_installed,-4,.))

*drop 2016-2018 data; we'll append 2016-2017 data from TTSXI below
drop if year>2015

*drop all unnecessary variables in order to append TTSXI data
drop  date_installed incentive_prog_names type size_kw appraised install_type installer cost_per_watt cost lbnl_tts_version_year lbnl_tts city utility_clean tech_1 model1_clean county annual_pv_prod annual_insolation rebate sales_tax_cost tilt1 tracking_type azimuth1 manuf2_clean manuf3_clean manuf1_clean inv_man_clean reported_annual_energy_prod incentivetype year_app_implied npv_fit_real application_implied npv_pbi_real other_incentive appraised_cluster inflation other_incentive_real zip_available cust_city pbi pbi_real pbi_length application fit_length fit_rate fit_payment _3rdparty_implied utility install_price_real_w install_price installer_clean manuf1_ inverter_reported rebate_real model1 _3rdparty inv_model_reported microinv_solarhub bipv_3 bipv_2 bipv_1 sales_tax_rate sales_tax_cost_real bipv_all thinfilm_all china sys_sizeac pbi_rate new_constr effic_1 cust_county tracking inv_model_clean mod_cost_real inv_cost_real bos_powerclerk_real permitting_real rdparty

*append TTSXI data for only 2016-2017
append using `"${data_intermediate}/NumberSystems_TTSXI_2016-2017.dta"'

recast str state

collapse (firstnm) state (sum) num_systems, by(zipcode year)

*rearrange panel data set so that there is a column for installations in each year rather than rows for zipcode-year combinations
gen num_systems_1909 = num_systems if year==1909
gen num_systems_1969 = num_systems if year==1969
gen num_systems_1983 = num_systems if year==1983
gen num_systems_1984 = num_systems if year==1984
gen num_systems_1986 = num_systems if year==1986
gen num_systems_1988 = num_systems if year==1988
gen num_systems_1992 = num_systems if year==1992
gen num_systems_1994 = num_systems if year==1994
gen num_systems_1995 = num_systems if year==1995
gen num_systems_1996 = num_systems if year==1996
gen num_systems_1997 = num_systems if year==1997
gen num_systems_1998 = num_systems if year==1998
gen num_systems_1999 = num_systems if year==1999
gen num_systems_2000 = num_systems if year==2000
gen num_systems_2001 = num_systems if year==2001
gen num_systems_2002 = num_systems if year==2002
gen num_systems_2003 = num_systems if year==2003
gen num_systems_2004 = num_systems if year==2004
gen num_systems_2005 = num_systems if year==2005
gen num_systems_2006 = num_systems if year==2006
gen num_systems_2007 = num_systems if year==2007
gen num_systems_2008 = num_systems if year==2008
gen num_systems_2009 = num_systems if year==2009
gen num_systems_2010 = num_systems if year==2010
gen num_systems_2011 = num_systems if year==2011
gen num_systems_2012 = num_systems if year==2012
gen num_systems_2013 = num_systems if year==2013
gen num_systems_2014 = num_systems if year==2014
gen num_systems_2015 = num_systems if year==2015
gen num_systems_2016 = num_systems if year==2016
gen num_systems_2017 = num_systems if year==2017

replace num_systems_1909 = 0 if missing(num_systems_1909)
replace num_systems_1969 = 0 if missing(num_systems_1969)
replace num_systems_1983 = 0 if missing(num_systems_1983)
replace num_systems_1984 = 0 if missing(num_systems_1984)
replace num_systems_1986 = 0 if missing(num_systems_1986)
replace num_systems_1988 = 0 if missing(num_systems_1988)
replace num_systems_1992 = 0 if missing(num_systems_1992)
replace num_systems_1994 = 0 if missing(num_systems_1994)
replace num_systems_1995 = 0 if missing(num_systems_1995)
replace num_systems_1996 = 0 if missing(num_systems_1996)
replace num_systems_1997 = 0 if missing(num_systems_1997)
replace num_systems_1998 = 0 if missing(num_systems_1998)
replace num_systems_1999 = 0 if missing(num_systems_1999)
replace num_systems_2000 = 0 if missing(num_systems_2000)
replace num_systems_2001 = 0 if missing(num_systems_2001)
replace num_systems_2002 = 0 if missing(num_systems_2002)
replace num_systems_2003 = 0 if missing(num_systems_2003)
replace num_systems_2004 = 0 if missing(num_systems_2004)
replace num_systems_2005 = 0 if missing(num_systems_2005)
replace num_systems_2006 = 0 if missing(num_systems_2006)
replace num_systems_2007 = 0 if missing(num_systems_2007)
replace num_systems_2008 = 0 if missing(num_systems_2008)
replace num_systems_2009 = 0 if missing(num_systems_2009)
replace num_systems_2010 = 0 if missing(num_systems_2010)
replace num_systems_2011 = 0 if missing(num_systems_2011)
replace num_systems_2012 = 0 if missing(num_systems_2012)
replace num_systems_2013 = 0 if missing(num_systems_2013)
replace num_systems_2014 = 0 if missing(num_systems_2014)
replace num_systems_2015 = 0 if missing(num_systems_2015)
replace num_systems_2016 = 0 if missing(num_systems_2016)
replace num_systems_2017 = 0 if missing(num_systems_2017)

collapse (firstnm) state (sum) num_systems (max) num_systems_1909 (max) num_systems_1969 ///
(max) num_systems_1983 (max) num_systems_1984 (max) num_systems_1986 (max) num_systems_1988 ///
(max) num_systems_1992 (max) num_systems_1994 (max) num_systems_1995 (max) num_systems_1996 (max) num_systems_1997 ///
(max) num_systems_1998 (max) num_systems_1999 (max) num_systems_2000 (max) num_systems_2001 ///
(max) num_systems_2002 (max) num_systems_2003 (max) num_systems_2004 (max) num_systems_2005 ///
(max) num_systems_2006 (max) num_systems_2007 (max) num_systems_2008 (max) num_systems_2009 ///
(max) num_systems_2010 (max) num_systems_2011 (max) num_systems_2012 (max) num_systems_2013 ///
(max) num_systems_2014 (max) num_systems_2015 (max) num_systems_2016 (max) num_systems_2017, by(zipcode)

*check to see if columns sum to the actual number of systems in each zipcode
gen check_num = num_systems_1909 + num_systems_1969 + num_systems_1983 + num_systems_1995 + num_systems_1996 + num_systems_1997 + num_systems_1998 + num_systems_1999 + num_systems_2000 + num_systems_2001 + num_systems_2002 + num_systems_2003 + num_systems_2004 + num_systems_2005 + num_systems_2006 + num_systems_2007 + num_systems_2008 + num_systems_2009 + num_systems_2010 + num_systems_2011 + num_systems_2012 + num_systems_2013 + num_systems_2014 + num_systems_2015 + num_systems_2016 + num_systems_2017
gen check = 1 if check_num==num_systems
*stop code to ensure check==1 for all observations
drop check check_num

save `"${data_intermediate}/NumberSystems_year"', replace
